library(ngsReports)
library(magrittr)
library(scales)
library(pander)
library(tidyverse)
theme_set(theme_bw())
deMuxFqc <- list.files("../2_demux/FastQC/", pattern = "zip", full.names = TRUE) %>%
getFastqcData()
alnFqc <- list.files("../4_aligned/FastQC/", pattern = "zip", full.names = TRUE) %>%
getFastqcData()
oryGC <- read_rds("oryGC.RDS")
Comparison of library sizes before and after alignment. Some degree of multiple alignment was observed in all libraries
readTotals(alnFqc) %>%
mutate(Sample = str_remove(Filename, ".sorted.bam"),
Population = case_when(
grepl("gc", Sample) ~ "1996",
grepl("ora", Sample) ~ "2012",
!grepl("(gc|ora)", Sample) ~ "2010"
)) %>%
dplyr::select(Sample, Population, Total_Sequences) %>%
group_by(Population) %>%
summarise(Samples = n(),
`Smallest Library` = min(Total_Sequences),
`Median Library` = median(Total_Sequences),
`Largest Library` = max(Total_Sequences),
`Total Alignments` = sum(Total_Sequences)) %>%
pander(big.mark = ",",
split.tables = Inf,
justify = "rrrrrr",
caption = "Summary of Library Sizes After Alignment")
| Population | Samples | Smallest Library | Median Library | Largest Library | Total Alignments |
|---|---|---|---|---|---|
| 1996 | 59 | 2,097,881 | 6,869,250 | 20,114,691 | 458,139,215 |
| 2010 | 37 | 1,556,040 | 5,155,139 | 9,391,888 | 203,789,421 |
| 2012 | 53 | 4,395,062 | 8,564,072 | 18,132,894 | 472,659,756 |
lowQ <- paste(c("pt1125", "gc2709", "gc2700", "gc2776", "ora663"), "sorted", "bam", sep = ".")
Potential low quality samples were identified by GC content as pt1125.sorted.bam, gc2709.sorted.bam, gc2700.sorted.bam, gc2776.sorted.bam and ora663.sorted.bam
alnFqc %>%
magrittr::extract(fileName(.) %in% lowQ) %>%
plotGcContent(plotType = "line",usePlotly = TRUE, GCobject = oryGC, species = "Ocuniculus")
GC content of potential low quality samples
alnFqc %>%
magrittr::extract(!fileName(.) %in% lowQ) %>%
plotGcContent(plotType = "line",usePlotly = TRUE, GCobject = oryGC, species = "Ocuniculus")
GC content of remaining samples
Alignments from these samples should be moved and placed into a separate folder to ensure their exclusion from the stacks pipeline.
sessionInfo() %>% pander()
R version 3.5.2 (2018-12-20)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_AU.UTF-8, LC_NUMERIC=C, LC_TIME=en_AU.UTF-8, LC_COLLATE=en_AU.UTF-8, LC_MONETARY=en_AU.UTF-8, LC_MESSAGES=en_AU.UTF-8, LC_PAPER=en_AU.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_AU.UTF-8 and LC_IDENTIFICATION=C
attached base packages: stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: bindrcpp(v.0.2.2), forcats(v.0.3.0), stringr(v.1.3.1), dplyr(v.0.7.8), purrr(v.0.2.5), readr(v.1.3.1), tidyr(v.0.8.2), tidyverse(v.1.2.1), pander(v.0.6.3), scales(v.1.0.0), magrittr(v.1.5), ngsReports(v.0.99.0), tibble(v.2.0.0), ggplot2(v.3.1.0), fastqcTheoreticalGC(v.0.0.1), BSgenome(v.1.50.0), rtracklayer(v.1.42.1), Biostrings(v.2.50.2), XVector(v.0.22.0), GenomicRanges(v.1.34.0), GenomeInfoDb(v.1.18.1), IRanges(v.2.16.0), S4Vectors(v.0.20.1) and BiocGenerics(v.0.28.0)
loaded via a namespace (and not attached): nlme(v.3.1-137), bitops(v.1.0-6), matrixStats(v.0.54.0), lubridate(v.1.7.4), RColorBrewer(v.1.1-2), httr(v.1.4.0), tools(v.3.5.2), backports(v.1.1.3), R6(v.2.3.0), lazyeval(v.0.2.1), colorspace(v.1.3-2), withr(v.2.1.2), tidyselect(v.0.2.5), compiler(v.3.5.2), cli(v.1.0.1), rvest(v.0.3.2), Biobase(v.2.42.0), Cairo(v.1.5-9), xml2(v.1.2.0), DelayedArray(v.0.8.0), plotly(v.4.8.0), ggdendro(v.0.1-20), labeling(v.0.3), digest(v.0.6.18), Rsamtools(v.1.34.0), rmarkdown(v.1.11), pkgconfig(v.2.0.2), htmltools(v.0.3.6), highr(v.0.7), htmlwidgets(v.1.3), rlang(v.0.3.0.1), readxl(v.1.2.0), rstudioapi(v.0.8), shiny(v.1.2.0), bindr(v.0.1.1), generics(v.0.0.2), zoo(v.1.8-4), hwriter(v.1.3.2), jsonlite(v.1.6), crosstalk(v.1.0.0), BiocParallel(v.1.16.5), RCurl(v.1.95-4.11), GenomeInfoDbData(v.1.2.0), Matrix(v.1.2-15), Rcpp(v.1.0.0), munsell(v.0.5.0), stringi(v.1.2.4), yaml(v.2.2.0), MASS(v.7.3-51.1), SummarizedExperiment(v.1.12.0), zlibbioc(v.1.28.0), plyr(v.1.8.4), grid(v.3.5.2), promises(v.1.0.1), crayon(v.1.3.4), lattice(v.0.20-38), haven(v.2.0.0), hms(v.0.4.2), knitr(v.1.21), pillar(v.1.3.1), XML(v.3.98-1.16), glue(v.1.3.0), evaluate(v.0.12), ShortRead(v.1.40.0), latticeExtra(v.0.6-28), data.table(v.1.11.8), modelr(v.0.1.2), httpuv(v.1.4.5.1), cellranger(v.1.1.0), gtable(v.0.2.0), assertthat(v.0.2.0), xfun(v.0.4), mime(v.0.6), xtable(v.1.8-3), broom(v.0.5.1), later(v.0.7.5), viridisLite(v.0.3.0) and GenomicAlignments(v.1.18.1)